package com.mano.web.webmagic;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.scheduling.annotation.Scheduled;

import java.io.IOException;
import java.io.InputStream;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;

/**
 * @Author: zj
 * @Description: 爬免费ip 并检测可用性  配置 IP代理池
 * @Date: Created in 9:26 2020/9/4
 * @Modified By:
 */
public class UpdateIp {

    public static final String UPDATE_IP="update_ip_webmagic";
    public static final String TEST_URL="http://www.baidu.com";
    public static final String GRAB_IP_URL="https://www.xicidaili.com/nn";

    @Autowired
    private RedisTemplate redisTemplate;

    /**
     * 检测ip 可用性
     */
    @Scheduled(cron = "*/20 * * * * ?")
    public void checkUse(){
        List<String> range = redisTemplate.opsForList().range(UPDATE_IP, 0, -1);
        for(String ip:range){
            if(ifUseless(ip)){
                System.out.println(ip + "  从redis移除");
                redisTemplate.opsForList().remove(UPDATE_IP,0,ip);
            }
        }
    }

    @Scheduled(cron = "*/15 * * * * ?")
    public void grabIpFromRemote(){
        String str = null;

        try {
            Document document = Jsoup.connect(GRAB_IP_URL).timeout(3000).get();
            Elements tags = document.select("#ip_list > tbody > tr");

            for(Element element:tags){
                //取得ip地址节点
                Elements tdChilds = element.select("tr > td:nth-child(2)");
                //取得端口号节点
                Elements tcpd = element.select("tr > td:nth-child(3)");
                if (StringUtils.isNotBlank(tdChilds.text()) && StringUtils.isNotBlank(tcpd.text())) {
                    str = tdChilds.text()+":"+tcpd.text();
                    if(!ifUseless(str)){
                        List<String> range = redisTemplate.opsForList().range(UPDATE_IP, 0, -1);
                        if(!range.contains(str)){
                            System.out.println(str + "  存进redis");
                            if(redisTemplate.opsForList().size(UPDATE_IP)>100){
                                redisTemplate.opsForList().rightPopAndLeftPush(UPDATE_IP,str);
                            }else{
                                redisTemplate.opsForList().rightPush(UPDATE_IP,str);
                            }
                        }
                    }
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 判断ip 是否失效
     * @param ip
     * @return
     */
    private boolean ifUseless(String ip) {
        String[] split = ip.split(":");
        URL url = null;
        InputStream in = null;

        try{
            url = new URL(TEST_URL);
            InetSocketAddress address = new InetSocketAddress(split[0], Integer.parseInt(split[1]));
            Proxy proxy = new Proxy(Proxy.Type.HTTP, address);


            try{
                URLConnection connection = url.openConnection(proxy);
                connection.setConnectTimeout(2000);
                in = connection.getInputStream();
            }catch (Exception e){
                // 报错直接不可用
                return true;
            }
            String resp = IOUtils.toString(in);
            if(resp.indexOf("baidu")>0){
                return false;
            }
            return true;

        }catch(Exception e){
            return true;
        }finally {
            try {
                in.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

    }

    /**
     *  判断ip 是否可用
     * @param ip
     * @return
     */
    public boolean ifIpAble(String ip){
        String[] split = ip.split(":");

        CloseableHttpClient httpClient = HttpClients.createDefault();
        CloseableHttpResponse resp = null;

        HttpHost proxy = new HttpHost(split[0], Integer.parseInt(split[1]));
        RequestConfig requestConfig = RequestConfig.custom().setProxy(proxy).setConnectTimeout(3000).setSocketTimeout(3000).build();

        HttpGet httpGet = new HttpGet(TEST_URL);
        httpGet.setConfig(requestConfig);

        httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;" +
                "q=0.9,image/webp,*/*;q=0.8");
        httpGet.setHeader("Accept-Encoding", "gzip, deflate, sdch");
        httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
        httpGet.setHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit" +
                "/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36");

        try {
            resp = httpClient.execute(httpGet);
            String respStr = EntityUtils.toString(resp.getEntity(), "UTF-8");
            if(respStr.indexOf("baidu")>0){
                return true;
            }
            return false;
        } catch (IOException e) {
            System.err.println(ip+"：ip 地址 不可用");
           return false;
        }finally {
            try {
                if (httpClient != null) {
                    httpClient.close();
                }
                if (resp != null) {
                    resp.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }


}
